{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "f04fbd44",
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "from openai import OpenAI\n",
    "import os\n",
    "import json\n",
    "import re\n",
    "from tqdm import tqdm\n",
    "from datasets import load_dataset\n",
    "\n",
    "client = OpenAI(api_key = '')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "e5fd8f89",
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = load_dataset('huseinzol05/malaysian-dialect-qa', split = 'test')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "7bdeeaa4",
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset_lang = load_dataset('huseinzol05/malaysian-dialect-qa-lang', split = 'test')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "07252999",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "140"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "questions = []\n",
    "for i in range(len(dataset)):\n",
    "    q = dataset[i]['question'] + '\\n\\nAfter that, put your final answer within $\\\\boxed{}$.'\n",
    "    questions.append((i, q))\n",
    "    \n",
    "len(questions)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "8fa11b9c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Lepas ujan jangan maen lari-lari, kan biyak.\\n\\nterjemah ke melayu baku\\n\\nAfter that, put your final answer within $\\\\boxed{}$.'"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "questions[0][1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "d04257f8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "mkdir: cannot create directory ‘openai-o3’: File exists\r\n"
     ]
    }
   ],
   "source": [
    "folder = 'openai-o3'\n",
    "# !rm -rf {folder}\n",
    "!mkdir {folder}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "e39af693",
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_answer(row, repeat = 5):\n",
    "    no, q = row\n",
    "    for k in range(repeat):\n",
    "        filename = os.path.join(folder, f'{no}-{k}.json')\n",
    "        try:\n",
    "            with open(filename) as fopen:\n",
    "                json.load(fopen)\n",
    "            continue\n",
    "        except:\n",
    "            pass\n",
    "        \n",
    "        for _ in range(5):\n",
    "\n",
    "            response = client.responses.create(\n",
    "              model=\"o3-2025-04-16\",\n",
    "              input=[\n",
    "                {\n",
    "                  \"role\": \"user\",\n",
    "                  \"content\": [\n",
    "                    {\n",
    "                      \"type\": \"input_text\",\n",
    "                      \"text\": q\n",
    "                    }\n",
    "                  ]\n",
    "                },\n",
    "              ],\n",
    "              text={\n",
    "                \"format\": {\n",
    "                  \"type\": \"text\"\n",
    "                }\n",
    "              },\n",
    "              reasoning={\n",
    "                \"effort\": \"medium\"\n",
    "              },\n",
    "            )\n",
    "            \n",
    "            text = response.output[1].content[0].text\n",
    "            try:\n",
    "                if 'boxed{' in text:\n",
    "                    text = text.split('boxed{')\n",
    "                    text = text[-1].split('}')[0]\n",
    "                    if 'text{' in text:\n",
    "                        text = text.split('text{')[1]\n",
    "                with open(filename, 'w') as fopen:\n",
    "                    json.dump(text, fopen)\n",
    "                break\n",
    "            except Exception as e:\n",
    "                print(e)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "0435c54a",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|███████████████████████████████████████| 140/140 [2:26:57<00:00, 62.98s/it]\n"
     ]
    }
   ],
   "source": [
    "for i in tqdm(range(len(dataset))):\n",
    "    generate_answer(questions[i])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "fb6076f3",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sacrebleu.metrics import CHRF\n",
    "from glob import glob\n",
    "from collections import defaultdict\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "3eea82ab",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|███████████████████████████████████████| 140/140 [00:00<00:00, 1163.45it/s]\n"
     ]
    }
   ],
   "source": [
    "chrf = CHRF()\n",
    "pairs = defaultdict(list)\n",
    "\n",
    "for i in tqdm(range(len(dataset_lang))):\n",
    "    from_lang = dataset_lang[i]['from_lang']\n",
    "    to_lang = dataset_lang[i]['to_lang']\n",
    "    gt = dataset_lang[i]['answer']\n",
    "    pair = f'{from_lang}<>{to_lang}'\n",
    "    files = glob(f'openai-o3/{i}-*.json')\n",
    "    scores = []\n",
    "    for f in files:\n",
    "        with open(f) as fopen:\n",
    "            d = json.load(fopen)\n",
    "        score = chrf.corpus_score([d], [[gt]]).score\n",
    "        scores.append(score)\n",
    "\n",
    "    max_score = max(scores)\n",
    "    pairs[pair].append(max_score)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "21ab014c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "From: johor To: malay, score: 60.57753659569944\n",
      "From: kedah To: malay, score: 60.625255717048255\n",
      "From: pahang To: malay, score: 57.43302925503334\n",
      "From: negeri sembilan To: malay, score: 65.65416081624213\n",
      "From: kelantan To: malay, score: 51.70250963642012\n",
      "From: penang To: malay, score: 63.34775715046866\n",
      "From: melaka To: malay, score: 48.90926542921075\n",
      "From: malay To: johor, score: 50.22451676633022\n",
      "From: malay To: kedah, score: 51.021160025382784\n",
      "From: malay To: pahang, score: 46.9526364405062\n",
      "From: malay To: negeri sembilan, score: 46.1334324624407\n",
      "From: malay To: kelantan, score: 42.73765886368901\n",
      "From: malay To: penang, score: 59.24333917229933\n",
      "From: malay To: melaka, score: 55.01495010635166\n"
     ]
    }
   ],
   "source": [
    "for k, v in pairs.items():\n",
    "    l, r = k.split('<>')\n",
    "    print(f'From: {l} To: {r}, score:', np.mean(v))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "31296161",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "50.18967054814284"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = \"\"\"\n",
    "From: malay To: johor, score: 50.22451676633022\n",
    "From: malay To: kedah, score: 51.021160025382784\n",
    "From: malay To: pahang, score: 46.9526364405062\n",
    "From: malay To: negeri sembilan, score: 46.1334324624407\n",
    "From: malay To: kelantan, score: 42.73765886368901\n",
    "From: malay To: penang, score: 59.24333917229933\n",
    "From: malay To: melaka, score: 55.01495010635166\n",
    "\"\"\"\n",
    "\n",
    "scores = []\n",
    "for l in x.split('\\n'):\n",
    "    if 'score:' not in l:\n",
    "        continue\n",
    "    \n",
    "    scores.append(float(l.split('score: ')[1]))\n",
    "    \n",
    "np.mean(scores)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "92c2e3e0",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "python3.10",
   "language": "python",
   "name": "python3.10"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.17"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
